This is the first of two organizational scripts to look at DO output as a timesieries.

Reading in the data:

lapply(c("plyr","dplyr","ggplot2","cowplot","lubridate",
         "tidyverse","data.table","xts","dygraphs",
         "nrlmetab","cowplot"), require, character.only=T)

source("./saved_fxns/LM.o2.at.sat.R")
source("./saved_fxns/LM.wind.scale.R")

## MiniDOT sensors - Glenbrook
GBNS1 <- read.csv("./AggRawData/AggregatedDO/GBNS1.csv", header=T)
GBNS2 <- read.csv("./AggRawData/AggregatedDO/GBNS2.csv", header=T)
GBNS3 <- read.csv("./AggRawData/AggregatedDO/GBNS3.csv", header=T)

## MiniDOT sensors - Blackwood
BWNS1 <- read.csv("./AggRawData/AggregatedDO/BWNS1.csv", header=T)
BWNS2 <- read.csv("./AggRawData/AggregatedDO/BWNS2.csv", header=T)
BWNS3 <- read.csv("./AggRawData/AggregatedDO/BWNS3.csv", header=T)

## Weather station
east_ws <- read.csv("./AggRawData/Climate/EastNSclimate.csv")
west_ws <- read.csv("./AggRawData/Climate/WestNSclimate.csv")

Adjusting output files for relevant columns:

## Adjust miniDOT data
miniDOT_adj <- function(x){
  
  df <- x %>% dplyr::select(timestamp, DO, Temp, site) %>%
    dplyr::rename(datetime = timestamp, do.obs = DO, wtr = Temp, site = site) %>%
    mutate(datetime = as.POSIXct((datetime), format ="%Y-%m-%d %H:%M:%S"))
  
  return(df)
}
# Glenbrook sensors
GS1 <- miniDOT_adj(GBNS1)
GS2 <- miniDOT_adj(GBNS2)
GS3 <- miniDOT_adj(GBNS3)

# Blackwood sensors
BS1 <- miniDOT_adj(BWNS1)
BS2 <- miniDOT_adj(BWNS2)
BS3 <- miniDOT_adj(BWNS3)

Function plotting DO data and looking for unusual peaks:

## GS1 DO corrections
vis_data_DO(GS1)
GS1_adj <- GS1[-which(GS1$datetime >= "2021-10-29 11:30:00" & GS1$datetime <= "2021-10-29 14:00:00"),]
GS1_adj <- GS1[-which(GS1$datetime >= "2022-04-05 00:00:00" & GS1$datetime <= "2022-05-25 00:00:00"),]
vis_data_DO(GS1_adj)
## GS2 DO corrections
vis_data_DO(GS2)
GS2_adj <- GS2[-which(GS2$datetime >= "2021-08-24 00:00:00" & GS2$datetime <= "2021-10-02 00:00:00"),]
GS2_adj <- GS2_adj[-which(GS2_adj$datetime >= "2022-04-05 00:00:00" & GS2_adj$datetime <= "2022-05-25 00:00:00"),]
vis_data_DO(BS2)
## GS2 DO corrections
vis_data_DO(GS3)
GS3_adj <- GS3
# none
## GS2 DO corrections
vis_data_DO(BS1)
BS1_adj <- BS1[-which(BS1$datetime >= "2021-08-04 00:00:00" & BS1$datetime <= "2021-08-07 00:00:00"),]
BS1_adj <- BS1_adj[-which(BS1_adj$datetime >= "2021-08-15 00:00:00" & BS1_adj$datetime <= "2021-08-18 00:00:00"),]
BS1_adj <- BS1_adj[-which(BS1_adj$datetime >= "2021-08-20 00:00:00" & BS1_adj$datetime <= "2021-08-25 00:00:00"),]
BS1_adj <- BS1_adj[-which(BS1_adj$datetime >= "2021-09-17 00:00:00" & BS1_adj$datetime <= "2021-09-20 00:00:00"),]
BS1_adj <- BS1_adj[-which(BS1_adj$datetime >= "2021-09-26 00:00:00" & BS1_adj$datetime <= "2021-09-29 00:00:00"),]
BS1_adj <- BS1_adj[-which(BS1_adj$datetime >= "2021-10-16 00:00:00" & BS1_adj$datetime <= "2021-10-26 00:00:00"),]
BS1_adj <- BS1_adj[-which(BS1_adj$datetime >= "2022-03-28 00:00:00" & BS1_adj$datetime <= "2022-03-29 00:00:00"),]
vis_data_DO(BS1_adj)
## GS2 DO corrections
vis_data_DO(BS2)
BS2_adj <- BS2
# none
## GS2 DO corrections
vis_data_DO(BS3)
BS3_adj <- BS3[-which(BS3$datetime >= "2021-08-04 00:00:00" & BS3$datetime <= "2021-08-07 00:00:00"),]
vis_data_DO(BS3_adj)

Adjust weather station data for the appropriate variables

## Adjust climate data
climate_adj <- function(x){
  
  ## rename and convert solar_rad to PAR
  c <- x %>% 
    dplyr::rename(datetime = Date_Time, 
                  solar_rad = solar_radiation_set_1, 
                  wspeed = wind_speed_set_1) %>%
    mutate(datetime = as.POSIXct((datetime), format ="%Y-%m-%d %H:%M:%OS")) %>%
    mutate(par = solar_rad*2.114) %>%
    dplyr::select(datetime, par, wspeed)
  
  ## take the mean each hour
  c <- c %>%
    dplyr::group_by(datetime = lubridate::floor_date(datetime, "hour")) %>%
    dplyr::summarise(par = mean(par, na.rm = TRUE), wspeed = mean(wspeed, na.rm = TRUE))
  
  ## adjust datetime from UTC to local PT time
  c$datetime <- c$datetime - lubridate::hours(8)
  
  return(c)
}
east_clim <- climate_adj(east_ws)
west_clim <- climate_adj(west_ws)

Aggregate DO/temp data, calc o2_sat, adjust wspeed, merge with clim data

DOcalcs_merge <- function(DOT, clim){
  
  ## take the mean each hour for the cleaned miniDOT data & calc o2_sat
  DOT_df <- DOT %>%
    dplyr::group_by(datetime = floor_date(datetime, "hour")) %>%
    dplyr::summarise(do.obs = mean(do.obs, na.rm = TRUE), wtr = mean(wtr, na.rm = TRUE)) %>%
    mutate(do_eq=o2.at.sat.base(temp = wtr,altitude = 1897)) %>%  # Tahoe altitude in m  = 1897 
    mutate(o2_sat=do.obs/do_eq)
  
  ## convert wind speed to 10m wind (required for gas exchange models per LakeMetabolizer)
  clim_df <- clim %>%
    mutate(wspeed = wind.scale.base(wspeed, wnd.z = 304.8)) ## need to double check this?
  
  ## merge
  df_all <- merge(DOT_df, clim_df, by="datetime")
  
  ## subset time frame
  df_all <- df_all[which(df_all$datetime <= "2022-05-20 00:00:00"),]
  
  return(df_all)
  
}

Combine DO and climatological data

# Glenbrook
GS1_all <- DOcalcs_merge(GS1_adj, east_clim)
GS2_all <- DOcalcs_merge(GS2_adj, east_clim)
GS3_all <- DOcalcs_merge(GS3_adj, east_clim)

# Blackwood
BS1_all <- DOcalcs_merge(BS1_adj, west_clim)
BS2_all <- DOcalcs_merge(BS2_adj, west_clim)
BS3_all <- DOcalcs_merge(BS3_adj, west_clim)

rm(BWNS1, BS1,
   BWNS2, BS2,
   BWNS3, BS3,
   GBNS1, GS1,
   GBNS2, GS2,
   GBNS3, GS3)

Calc par_int for all sites using TERC Kd values

extcoef <- read.csv("./AggRawData/TERC_2021_Kd_coeffs.csv", header=T) 
## modify kd dataframe
head(extcoef)
##    datetime     PAR_Kd
## 1  7/1/2021 0.07549272
## 2 8/20/2021 0.09165585
## 3 8/23/2021 0.08386734
## 4 8/25/2021 0.08546171
## 5 8/27/2021 0.08900488
## 6  9/2/2021 0.08668523
# No time obs so dummy data set:
extcoef$datetime2 <- as.POSIXct(c(
  "2021-07-01 12:00:00",
  "2021-08-20 12:00:00",
  "2021-08-23 12:00:00",
  "2021-08-25 12:00:00",
  "2021-08-27 12:00:00",
  "2021-09-02 12:00:00",
  "2021-09-21 12:00:00",
  "2021-09-30 12:00:00"), 
  tz="America/Los_Angeles",
  format = c("%Y-%m-%d %H:%M:%OS"))

# separate by year 
extcoef22<-extcoef
extcoef22$datetime2 <- extcoef22$datetime2 %m+% years(1)
extcoef<- rbind(extcoef, extcoef22)
extcoef$year <- lubridate::year(extcoef$datetime2)

kd_O <- read.csv("./AggRawData/Climate/PAR_FA10B7CAD952_W_Kdest.csv")
kd_O<- na.omit(kd_O)

kd_O1 <- kd_O %>%
  mutate(datetime = as.POSIXct((datetime), format ="%Y-%m-%d %H:%M:%S")) 
kd_O1$hour <- lubridate::hour(kd_O1$datetime)
kd_O1$yday <- lubridate::yday(kd_O1$datetime)
kd_O1$year <- lubridate::year(kd_O1$datetime)


kd_O2 <- kd_O1 %>% 
  subset(hour>10 & hour<17) %>%
  group_by(datetime = floor_date(datetime, "day"), yday, year) %>%
  dplyr::summarise(PAR_Kd = mean(Kd2, na.rm = T))

extcoef$yday <- lubridate::yday(extcoef$datetime2)

extcoef22 <- extcoef %>%
  dplyr::select(datetime2, PAR_Kd, yday, year) %>%
  dplyr::rename(datetime = datetime2)

kd_O3<- rbind(kd_O2, extcoef22)

Data frame for interpolation of PAR observations.

## create data frame for interpolation
Kd_int21 <- as.data.frame(cbind("year" = rep(2021,length(seq(from = extcoef$yday[1], to = 365))), ## you added the :2022
                              "yday" = seq(from = extcoef$yday[1], to = 365)))

Kd_int22 <- as.data.frame(cbind("year" = rep(2022,length(seq(1:365))), ## you added the :2022
                                "yday" = seq(1:365)))

##
Kd_int21 <- left_join(Kd_int21, kd_O3[,c("year", "yday","PAR_Kd")], by=c("year", "yday"), all = T)


Kd_int22 <- left_join(Kd_int22, kd_O3[,c("year", "yday","PAR_Kd")], by=c("year", "yday"), all = T)

Kd_int22all<-rbind(Kd_int21, Kd_int22)

## interpolate between Kd values to create a daily time series
Kd_int <- Kd_int22all %>%
  mutate(PAR_Kd = na.approx(PAR_Kd, maxgap = Inf, na.rm=F))
#check
plot(Kd_int$PAR_Kd)

sapply(Kd_int, class)
##      year      yday    PAR_Kd 
## "numeric" "numeric" "numeric"

Merge par data with DO data.

### Apply these Kd values to all time series
calc_par_int <-function(df, Kd_df){
  
  ## calc yday for data frame
  df$yday <- lubridate::yday(df$datetime)
  
  ## merge Kd time series with data
  dat <- left_join(df, Kd_df, by=("yday"))
  
  ## calc par_int
  dat <- dat %>%
    mutate(par_int = (par - par*exp(-PAR_Kd*3))/(PAR_Kd*3)) # multipler = "(extcoef*3)" should be depth of water column (3m in this case)
  
  #filter dates
  dat <- dat %>%
    filter(yday >= 1 & yday <=365)

  return(dat)
  
}

Plot function for new data sets

GS1_all <- calc_par_int(GS1_all, Kd_int)
GS2_all <- calc_par_int(GS2_all, Kd_int)
GS3_all <- calc_par_int(GS3_all, Kd_int)

BS1_all <- calc_par_int(BS1_all, Kd_int)
BS2_all <- calc_par_int(BS2_all, Kd_int)
BS3_all <- calc_par_int(BS3_all, Kd_int)
## visualize
visualize_input_data <-function(df){
  
  plot_grid(
    ggplot(data = df, aes(x=datetime,y=par_int)) + geom_point(),
    ggplot(data = df, aes(x=datetime,y=do.obs)) + geom_point(),
    ggplot(data = df, aes(x=datetime,y=wtr)) + geom_point(),
    ggplot(data = df, aes(x=datetime,y=wspeed)) + geom_point(),
    ncol = 1, align = "v")
  
}
# plot BW input data
visualize_input_data(BS1_all)

visualize_input_data(BS2_all)

visualize_input_data(BS3_all)

# plot GS input data
visualize_input_data(GS1_all)

visualize_input_data(GS2_all)

visualize_input_data(GS3_all)

Optional subset for data chunks

Save modified data files in local path